rm(list=ls())
library(foreign)
library(simex)
library(arm)
library(Amelia)
library(memisc)
library(Hmisc)
library(apsrtable)
library(Zelig)

setwd("~/Dropbox/ThomsonDEU")
deu<-read.csv("deu15_27_26March2012.csv")

# Separate out EU15 and EU25 for imputation. Do EU15 and EU27 separately.
# Given high degrees of missingness, EP group positions and Bulgaria/Romania not included.

all<-deu[,c(1,6:9,11:28,30:36,43:46,48:65,67:71)]

# Lines 21-24 were run to impute using amelia. To Run again, uncomment 

#allimp<-amelia(all,m=5,cs="prnrnmc",empri=4)
#write.amelia(allimp,file.stem="allimp")
# Average over 5 imputed data sets (wrong, but of little consequence)

imp<-vector("list",5)
imp[[1]]<-read.csv("allimp1.csv",row.names=1)
imp[[2]]<-read.csv("allimp2.csv",row.names=1)
imp[[3]]<-read.csv("allimp3.csv",row.names=1)
imp[[4]]<-read.csv("allimp4.csv",row.names=1)
imp[[5]]<-read.csv("allimp5.csv",row.names=1)

# Weights Pre Enlargement
cwold<-c(4,5,3,3,10,10,5,3,10,2,5,5,8,4,10)/87
cwmatold<-matrix(rep(cwold,173),ncol=length(cwold),byrow=T)

# Weights Post Enlargement # No Bulgaria/Romania
cwnew<-c(10,12,4,12,7,4,7,29,29,12,12,7,29,4,7,4,3,13,27,12,4,7,27,10,29)/321
cwmatnew<-matrix(rep(cwnew,158),ncol=length(cwnew),byrow=T)

# create holding lists
pos15<-vector("list",5)
sal15<-vector("list",5)
respos15<-vector("list",5)
wcouncil15<-vector("list",5)
ep15<-vector("list",5)
com15<-vector("list",5)

pos25<-vector("list",5)
sal25<-vector("list",5)
respos25<-vector("list",5)
wcouncil25<-vector("list",5)
ep25<-vector("list",5)
com25<-vector("list",5)

wcouncil<-vector("list",5)
ep<-vector("list",5)
com<-vector("list",5)

ref<-vector("list",5)
out<-vector("list",5)

mcouncil<-vector("list",5)
changeout<-vector("list",5)
drefwc<-vector("list",5)
drefep<-vector("list",5)
drefcom<-vector("list",5)

dta<-vector("list",5)



for(i in 1:5){

pos15[[i]]<-imp[[i]][1:173,c(2:5,8,10:13,15,16,19,21,23,26:30)]
sal15[[i]]<-imp[[i]][1:173,c(31:34,37,39:42,44,45,48,50,52,55:57)]
respos15[[i]]<-(pos15[[i]][,1:17]-50)*sal15[[i]]
	
wcouncil15[[i]]<-rowSums(respos15[[i]][,3:ncol(respos15[[i]])]*cwmatold)/500
ep15[[i]]<-respos15[[i]]$pep/500
com15[[i]]<-respos15[[i]]$pcom/500

pos25[[i]]<-imp[[i]][174:331,c(2:30)]
sal25[[i]]<-imp[[i]][174:331,c(31:57)]
respos25[[i]]<-(pos25[[i]][,1:27]-50)*sal25[[i]]
	
wcouncil25[[i]]<-rowSums(respos25[[i]][,3:ncol(respos25[[i]])]*cwmatnew)/500
ep25[[i]]<-respos25[[i]]$pep/500
com25[[i]]<-respos25[[i]]$pcom/500

wcouncil[[i]]<-c(wcouncil15[[i]],wcouncil25[[i]])
ep[[i]]<-c(ep15[[i]],ep25[[i]])
com[[i]]<-c(com15[[i]],com25[[i]])
	
## No Sal median council pos (all equal)

calcissuepivot<-function(thres,votes,pos){
	pos<-as.numeric(pos)
	votes<-as.numeric(votes)
	votepos<-as.data.frame(matrix(c(votes,pos),nrow=length(votes)))
	colnames(votepos)<-c("votes","pos")
	voteord<-votepos[order(votepos$pos),]
	voteord$sum<-cumsum(voteord$vote)
	pivpos<-voteord$pos[voteord$sum>=thres]
	return(pivpos[1])
	}

pivcouncil15<-rep(NA,173)
for (j in 1:173){
	pivcouncil15[j]<-calcissuepivot(thres=0.7126,votes=cwold,pos=as.vector(pos15[[i]][j,3:17]))
	}

pivcouncil25<-rep(NA,158)
for (j in 1:158){
	pivcouncil25[j]<-calcissuepivot(thres=0.74,votes=cwnew,pos=as.vector(pos25[[i]][j,3:27]))
	}

pivcouncil<-c(pivcouncil15,pivcouncil25)

mcouncil<-c(apply(pos15[[i]][,3:17],1,median),apply(pos25[[i]][,3:27],1,median))
	
ref<-(imp[[i]][,29]-50)/5
out<-(imp[[i]][,30]-50)/5

cod<-grep("d",deu$prnr,fixed=TRUE)
cod<-ifelse(rownames(deu) %in% cod==TRUE,1,0)

changeout[[i]]<-abs(out-ref)
drefwc[[i]]<-abs(wcouncil[[i]]-ref)
drefep[[i]]<-abs(ep[[i]]-ref)
drefcom[[i]]<-abs(com[[i]]-ref)

dta[[i]]<-as.data.frame(cbind(imp[[i]][,1],cod,wcouncil[[i]],pivcouncil,ep[[i]],com[[i]],ref,out,changeout[[i]],drefwc[[i]],drefep[[i]],drefcom[[i]]))
colnames(dta[[i]])<-c("prnrnmc","cod","wcouncil","pivcouncil","ep","com","ref","out","changeout","drefwc","drefep","drefcom")

}

### Run Models with Zelig for MI purposes

mod1<-zelig(out~ep+com+wcouncil-1,model="ls",data=mi(dta[[1]],dta[[2]],dta[[3]],dta[[4]],dta[[5]]))
summary(mod1)

# Mod1 with robust (sandwich) se's
mod1<-zelig(out~ep+com+wcouncil-1,model="ls",robust=TRUE,data=mi(dta[[1]],dta[[2]],dta[[3]],dta[[4]],dta[[5]]))
summary(mod1)

# Mod1 with random intercepts
library(lme4)
mod1a<-zelig(out~ep+com+wcouncil +tag(1|prnrnmc),model="ls.mixed",data=mi(dta[[1]],dta[[2]],dta[[3]],dta[[4]],dta[[5]]))
summary(mod1a)

### For SIMEX, need to collapse imputed datasets, or run separately.
reddta<-as.data.frame(Reduce('+',dta)/5)

# Mod1 Simex

mod1<-lm(out~ep+com+wcouncil-1,data=reddta)
summary(mod1)

mod1.simexa<-simex(mod1,SIMEXvariable=c("ep"),measurement.error=c(10),asymptotic=F)
summary(mod1.simexa)

mod1.simexa<-simex(mod1,SIMEXvariable=c("ep","com"),measurement.error=c(6,6),asymptotic=F)
summary(mod1.simexa)

# Mod1 with different levels of measurement error in Simex reg
mod1.simexb<-simex(mod1,SIMEXvariable=c("ep","com"),measurement.error=c(10,10),asymptotic=F)
summary(mod1.simexb)

mod1.simexc<-simex(mod1,SIMEXvariable="ep",measurement.error=sd(reddta$ep),asymptotic=F)
summary(mod1.simexc)
## I have also tried a variety of different measurement errors-- both models seem pretty robust to choice of sd.

### Run interaction model
mod3<-zelig(out~ep+com+wcouncil+cod*ep+cod*com-1,model="ls",data=mi(dta[[1]],dta[[2]],dta[[3]],dta[[4]],dta[[5]]))
summary(mod3)


# Run models with distance to reference point

mod5<-zelig(changeout~drefwc+drefep-1,model="ls",data=mi(dta[[1]],dta[[2]],dta[[3]],dta[[4]],dta[[5]]))
summary(mod5)

mod6<-zelig(changeout~drefwc+drefep+drefcom-1,model="ls",data=mi(dta[[1]],dta[[2]],dta[[3]],dta[[4]],dta[[5]]))
summary(mod6)

mod6.simex<-simex(mod6,SIMEXvariable="drefep",measurement.error=6,asymptotic=F)
summary(mod6.simex)


